R2/CI-5 AUCCI_MI (Incomplete).R

### Part I. CI functions
## 1-5 [CI] AUCCI_MI(Incomplete):  1.5.1 AUCCI.MI

# Library ##############################################################################
# [package] mice, norm, [functoins] mice2, adp.norm, MI.norm are required: CI-1b.auxillary functions.R

# needs functions: AUC, data2xy, AUCCI, HMS.equation

# MI methods
MI.methods = data.frame(functions=c(rep("mice2",2),rep("MI.norm",3)), methods=c("pmm","logreg","simple","coinflip","adaptive"))
########################################################################################


## 1.5.1 AUCCI.MI ##################################################################### 
# For score types DO NOT use LT!!! very unstable for theta near 1

AUCCI.MI = function(data, MI.function, MI.method, score.MI = "fixed.r", m, maxit=5, CI.method, alpha=.05, LT=FALSE, variance=FALSE, MI.thetas=FALSE, lambda=FALSE, ...) {
  # note: predictorMatrix(for mice) should exclude disease
  # LT: logit transformation      # not available for RG
  # returning variance when logit=T is var(logit(theta))
  # score.MI = type of adding B to W: "fixed.r", "fixed.B"
  
  ### A. imputation stage : output = data.comp and m
  {
    # 1. when multiple imputation is already done and stored in a dataset as a list.
    # For efficient simulation
    if (class(data) == "list") {
      m = length(data)
      data.comp = data
    }
    
    # 2. when multiple imutation needs to be done for a dataframe
    # for convenience of one-time CI construction. very unefficient for large size of simulation
    else if (class(data) == "data.frame") {
      if (identical(MI.function, mice2)) {
        data.comp <- MI.function(data=data[,-1], method=MI.method, m=m, maxit=maxit, printFlag=FALSE,...)
      } else if (identical(MI.function, MI.norm)|identical(MI.function, MI.norm2)) { #MI.norm2 added (da.norm) vs MI.norm(imp.norm)
        data.comp <- MI.function(data=data[,-1], rounding=MI.method, m=m, maxit=maxit, showits=FALSE,...)
      } else {stop("the MI.function is neither mice2 nor MI.norm nor MI.norm2")}
    }
    
    # 3. Otherwise, stop running the code.
    else {stop("data is neither a dataframe nor a list of imputed datasets.")}
  }
  
  ### B. Inference stage: point estimate
  {
    mi.stat = data.frame(theta = rep(NA, m), var = rep(NA, m))
    n.x.vec = rep(NA, m)
    for (i in 1:m) {
      temp = data2xy(data.comp[[i]],disease="diseaseR", marker="marker")
      x = temp$x ; y = temp$y
      mi.stat$theta[i] = AUC(x, y,...)
      n.x.vec[i] = length(x)
      mi.stat$pXY[i] = p.XY(x,y)
      
      if ("Mee" %in% CI.method) {
        mi.stat$N.J.hat[i] = Mee.stat(x, y)$N.J.hat
      }
    }
    n = dim(data.comp[[1]])[1]
    n.x = round(mean(n.x.vec, na.rm=TRUE))
      if (n.x <= 1 & mean(n.x.vec, na.rm=TRUE) >1) {n.x <- 2} # if n.x is 1, variance is not estimable.
    n.y = n - n.x
    mi.stat$theta.LT = logit(mi.stat$theta,LT=LT)
    mi.stat$div.LT = ifelse(LT,(mi.stat$theta[i]*(1-mi.stat$theta[i]))^2,1)
    mean.MI = mean(mi.stat$theta, na.rm=TRUE)
    mean.MI.LT = mean(mi.stat$theta.LT, na.rm=TRUE)
    var.B.LT = var(mi.stat$theta.LT, na.rm=TRUE)    # => Erase when not necessary any more after using funtion Rubin
    mean.pXY = mean(mi.stat$pXY, na.rm=TRUE)
    
  }
  
  ### C. CI stage
  {
    # Categorizing CI types
    CI.Wald = c("HM1", "HM2", "NW", "CM", "Bm", "DL")
    CI.Score = c("NS1", "NS2", "Mee")
    CI.root = c("DB", "DG")
    CI.other = c("RG")
    lam <- nu <- NA # by default lamda and nu is set as NA, and is calculated if applicable
    if (CI.method %in% CI.Wald) {
      for (i in 1:m) {
        mi.stat$var.LT[i] = AUCCI(data.comp[[i]], CI.method=CI.method, disease="diseaseR", alpha=alpha, variance = TRUE, LT=LT, ...)$V.hat
      }
      # when there is only one 0 or 1, V will be Inf (actually it's not inf, but nonestimable. So eliminate them when averaging.)
      mi.var.fin = mi.stat$var.LT[is.finite(mi.stat$var.LT)]
      # If every MI cases has unestimable variance(NA or Inf), Wald type CI can not be gotten.
      if (length(mi.var.fin) > 0) {
        Rubin = Rubin(W=mean(mi.var.fin), MI=mi.stat$theta.LT, alpha=alpha, print.r = TRUE, print.nu = TRUE, print.lambda = TRUE)
        var.MI.LT = Rubin$v.final         # W + (1/m)*B for MI
        if (is.na(var.MI.LT)) {
          CI <- CI.LT <- c(NA,NA)
        } else {
          nu <- Rubin$nu             # degree of freedom for MI
          r <- Rubin$r
          if (is.na(nu) & var.MI.LT==0) {nu=Inf}
          CI.LT = CI.base(mean.MI.LT, var.MI.LT, alpha, qt, df=nu)    # logit scale (will back-transform in the end)
          CI = expit(CI.LT, LT=LT)
          lam = Rubin$lambda
        }
      } else {
        CI.LT <- CI <- c(NA,NA)
      }
    }
    
    else if (CI.method %in% CI.Score) {
      start = c( (mean.MI+.5)/2, (mean.MI+1)/2)
      start = mean.MI + c(-1,+1)*qnorm(1-alpha/2)*sqrt(mean.MI*(1-mean.MI)/n.x/n.y)
      if (score.MI == "fixed.r" & LT==FALSE) {  #if n.x==1, var is not estimable, make it population var to estimate r.
        W = V.HM(AUC=mean.MI, n.x=n.x, n.y=n.y, LT=LT, pXY=mean.pXY, NC=FALSE, sample.var=ifelse(n.x==1,FALSE,TRUE))
        r = Rubin(W=W, MI=mi.stat$theta.LT, alpha=alpha, print.r=TRUE)$r
        mi.theta.fin = mi.stat$theta.LT[is.finite(mi.stat$theta.LT)]
        if (length(mi.theta.fin)==0) {
          CI.LT <- CI <- c(NA,NA)
        } else {
        if (all(mi.theta.fin==1)) {r=1}  #if all theta_(i)'s are 1, W=0, r=undefined, V=0
        if (CI.method == "NS1") {
          CI = polyroot2(HM.coef,AUC.hat=mean.MI, n.x=n.x, n.y=n.y, pXY=mean.pXY, NC=FALSE, alpha=alpha, r=r)
        } else if (CI.method == "NS2") {
          CI = polyroot2(HM.coef,AUC.hat=mean.MI, n.x=n.x, n.y=n.y, pXY=mean.pXY, NC=TRUE, alpha=alpha, r=r)
        } else if (CI.method == "Mee") {
          CI = polyroot2(Mee.coef,AUC.hat=mean.MI, N.J.hat=mean(mi.stat$N.J.hat, na.rm=TRUE), alpha=alpha, r=r)
        }
        }
      }
      else {
        if (CI.method == "NS1") {
          CI = multiroot2(HMS.equation, start, AUC.hat=mean.MI, n.x=n.x, n.y=n.y, pXY=mean.pXY, alpha=alpha, MI=mi.stat$theta.LT, LT=LT, score.MI=score.MI, sample.var=FALSE, rtol = 1e-10, atol = 1e-10, ...)$root    
        } else if (CI.method == "NS2") {
          CI = multiroot2(HMS.equation,  start, AUC.hat=mean.MI, n.x=n.x, n.y=n.y, pXY=mean.pXY, alpha=alpha, MI=mi.stat$theta.LT, LT=LT, NC=TRUE, score.MI=score.MI, sample.var=FALSE, rtol = 1e-10, atol = 1e-10, ...)$root 
        } else if (CI.method == "Mee") {
          CI = multiroot2(Mee.equation,  start, AUC.hat=mean.MI, N.J.hat = mean(mi.stat$N.J.hat, na.rm=TRUE), alpha=alpha, MI=mi.stat$theta.LT, LT=LT, score.MI=score.MI, sample.var=FALSE, rtol = 1e-10, atol = 1e-10, ...)$root
        }
      }
      
      CI.LT = logit(CI,LT=LT)
      if (variance) {
        for (i in 1:m) {
          mi.stat$var.LT[i] = AUCCI(data.comp[[i]], CI.method=CI.method, disease="diseaseR", alpha=alpha, variance = TRUE, LT=LT, ...)$V.hat
        }
        Rubin = Rubin(W=mean(mi.stat$var.LT), MI=mi.stat$theta.LT, alpha=alpha)
        var.MI.LT = Rubin$v.final         # W + (1/m)*B for MI
      }
    }
    
    else if (CI.method %in% CI.root) {
      if (CI.method == "DB") {
        CI = DB(AUC.hat=mean.MI, n.x=n.x, n.y=n.y, alpha=alpha, MI=mi.stat$theta.LT, LT=LT)
        CI.LT = logit(CI, LT=LT)
        if (variance) {
          MI.DB = data.frame(alp = rep(NA, m), var = rep(NA, m))
          for (i in 1:m) {
            MI.DB$alp[i] = multiroot2(DB.equation, c(1,3), AUC.hat=mi.stat$theta[i], n.x=n.x, n.y=n.y, alpha=1 ,LT=LT, rtol = 1e-10, atol = 1e-10)$root[1]
            MI.DB$var.LT[i] = V.DB(MI.DB$alp[i], n.x, n.y, LT=LT, ...)
          }
          Rubin = Rubin(W=mean(MI.DB$var.LT), MI=mi.stat$theta.LT, alpha=alpha)
          var.MI.LT = Rubin$v.final         # W + (1/m)*B for MI
        }
      }
      
      else if (CI.method == "DG") {
        CI = DG(AUC.hat=mean.MI, n.x=n.x, n.y=n.y, alpha=alpha, MI=mi.stat$theta.LT, LT=LT, ...)
        CI.LT = logit(CI, LT=LT)
        if (variance) {
          MI.DG = data.frame(delta = rep(NA, m), var.LT = rep(NA, m))
          for (i in 1:m) {
            MI.DG$delta[i] = multiroot2(DG.equation, c(1,3), AUC.hat=mi.stat$theta[i], n.x=n.x, n.y=n.y, alpha=1,LT=LT, rtol = 1e-10, atol = 1e-10)$root[1]
            MI.DG$var.LT[i] = V.DG(MI.DG$delta[i], n.x, n.y, LT=LT, ...)
          }
          Rubin = Rubin(W=mean(mi.stat$var.LT), MI=mi.stat$theta.LT, alpha=alpha)
          var.MI.LT = Rubin$v.final         # W + (1/m)*B for MI
        }
      }
    }  
    else if (CI.method == "RG") {         # LT, variance not available
      MI.RG = data.frame(delta = rep(NA, m), var = rep(NA, m))
      for (i in 1:m) {
        temp = data2xy(data.comp[[i]],disease="diseaseR", marker="marker")
        x = temp$x ; y = temp$y
        MI.RG$delta[i] = probit.RG(x, y)$delta
        MI.RG$var[i] = probit.RG(x, y)$V
      }
      delta.mean = mean(MI.RG$delta)
      Rubin = Rubin(W=mean(MI.RG$var), MI=MI.RG$delta, alpha=alpha)
      var.MI = Rubin$v.final         # W + (1/m)*B for MI
      z.val = Rubin$z.val
      d.CI = delta.mean + c(-1,1)*z.val*sqrt(var.MI)
      CI = pnorm(d.CI)
      CI.LT = logit(CI, LT=LT) # logit transformation not available for RG (only for presentation purpose)
      V.MI.LT = NA                # variance not available for RG(The V above is variance in probit scale)
      mean.MI=pnorm(delta.mean)
      var.MI.LT = NA
    }
    
    else {warning(paste(CI.method,"is not available"))} 
  }
  
  result = list(AUC.hat = mean.MI, CI = CI);  if (variance) {result$V.hat = var.MI.LT}; if (MI.thetas) {result$theta.MI = mi.stat$theta; result$theta.LT = mi.stat$theta.LT }
  if (lambda) {result$lambda = lam; result$nu = nu}
  return(result)
}
gjm112/AUCCI documentation built on May 17, 2019, 6:03 a.m.